Farina in Natura - estratégia¶

Importando os dados e observações iniciais.¶

In [1]:
#.git log --pretty=format:%cI,%h,%an,%ae,%s >  C:\Users\ctobr\OneDrive\DATA_Science\1_farina_in_natura\COMMIT_LOG\commits.csv
In [2]:
# Código UBUNTU GIT LOG

# 1) git log --pretty=format:%cI,%h,%an,%ae,%s > /home/claudio/Farina001_GIT_LOG/commits.csv

# 2) abrir excel e e na aba dados, imrtar o arquivo gerado pelo código acima
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns; sns.set()
import datetime
import nbformat
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode (connected = True )
from IPython.display import display
import plotly.graph_objs as go
import plotly.offline as py
from matplotlib.backends.backend_pdf import PdfPages
from fpdf import FPDF
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None
import locale
In [4]:
# para resolver o problema: "(unicode error) 'utf-8' codec can't decode "
# https://www.youtube.com/watch?v=ZSC7X4N8RKo,
#Erro: 
# UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 900: invalid continuation byte
# enem = pd.read_csv("MICRODADOS_ENEM_2020.csv", encoding="utf-8", sep = ';')
# enem = pd.read_csv("MICRODADOS_ENEM_2020.csv", encoding="ISO-8859-1", sep = ';')

transactions = pd.read_csv(r'/home/claudio/farina_in_natura001/CSV/farina.CSV', 
encoding="ISO-8859-1", sep = ';')
In [5]:
transactions.head()
Out[5]:
venda data_hora cliente mesa item produto quantidade valor_unit desconto total_item
0 460.0 01/02/2022 08:13 CONSUMIDOR M 1 119.0 SUCO DE LARANJA 400ML 1.0 14,5 0.0 14,5
1 460.0 01/02/2022 08:13 CONSUMIDOR M 1 121.0 PAO SOURDOUGH OVOS 1.0 15,5 0.0 15,5
2 461.0 01/02/2022 08:14 CONSUMIDOR M 2 119.0 SUCO DE LARANJA 400ML 1.0 14,5 0.0 14,5
3 461.0 01/02/2022 08:14 CONSUMIDOR M 2 121.0 PAO SOURDOUGH OVOS 1.0 15,5 0.0 15,5
4 465.0 01/02/2022 09:01 CONSUMIDOR M 1 46.0 QUICHE BACON 90G 2.0 19,5 0.0 39
In [6]:
transactions.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25998 entries, 0 to 25997
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   venda       25992 non-null  float64
 1   data_hora   25992 non-null  object 
 2   cliente     25992 non-null  object 
 3   mesa        22779 non-null  object 
 4   item        25922 non-null  float64
 5   produto     25992 non-null  object 
 6   quantidade  25992 non-null  float64
 7   valor_unit  25992 non-null  object 
 8   desconto    25992 non-null  float64
 9   total_item  25992 non-null  object 
dtypes: float64(4), object(6)
memory usage: 2.0+ MB
In [ ]:
 

Limpeza e manipulação dos dados.¶

In [7]:
### CHECK OUT IF THE NUMBER OF TOTAL ENTRIES IS EQUAL TO THE NUMBER OF ENTRIES OF THE VARIABLES  

transactions.isnull().sum()
Out[7]:
venda            6
data_hora        6
cliente          6
mesa          3219
item            76
produto          6
quantidade       6
valor_unit       6
desconto         6
total_item       6
dtype: int64
In [8]:
## DROPPING THE ROWS THAT ALL COLUMN VALUES ARE NULL
#  https://www.youtube.com/watch?v=uDr67HBIPz8&t=818s

transactions = transactions.dropna(how='all')
In [9]:
# Delete the column "item"

del transactions["item"]
In [10]:
## Using isna() to select all rows with NaN in the column ('mesa') and the value "CONSUMIDOR" on the column "cliente" 
## and replace the "mesa" value with "M0"
## https://datatofish.com/rows-with-nan-pandas-dataframe/
## https://www.geeksforgeeks.org/how-to-replace-values-in-column-based-on-condition-in-pandas/

transactions.loc[(transactions["cliente"] == 'CONSUMIDOR') &  (transactions['mesa'].isna()), 'mesa']= 'M0'
In [11]:
# Checking if the changes were correct.

transactions.loc[transactions["mesa"] == 'M0']
Out[11]:
venda data_hora cliente mesa produto quantidade valor_unit desconto total_item
121 520.0 02/02/2022 16:57 CONSUMIDOR M0 BOMBOLONI NUTELLA 80G 2.0 12 0.0 24
122 520.0 02/02/2022 16:57 CONSUMIDOR M0 TARTELETE DE MORANGO 120G 1.0 18 0.0 18
123 520.0 02/02/2022 16:57 CONSUMIDOR M0 CROISSANT MUSSATRELA DE 1.0 25,5 0.0 25,5
894 856.0 11/02/2022 19:19 CONSUMIDOR M0 CROISSANT BRIE PARMA MEL DE 2.0 25,5 0.0 51
895 856.0 11/02/2022 19:19 CONSUMIDOR M0 VH SANTA HORTENSIA SAUVIG 1.0 59 0.0 59
In [12]:
# Assigning a value "ND" to the empty records in column "mesa"
# https://www.w3schools.com/python/pandas/pandas_cleaning_empty_cells.asp

transactions['mesa'].fillna("ND",inplace = True )
In [13]:
# Convert the "venda" variable from float64 type to integer type
# https://stackoverflow.com/questions/43956335/convert-float64-column-to-int64-in-pandas

transactions['venda'] = np.int64(transactions['venda'])
In [14]:
# Convert the "quantidade" variable from float64 type to integer type
# https://stackoverflow.com/questions/43956335/convert-float64-column-to-int64-in-pandas

transactions['quantidade'] = np.int64(transactions['quantidade'])
In [15]:
# change the decimal separator of the the variable "valor_unit" from "," to "."
# https://cursos.alura.com.br/forum/topico-transformar-pontos-por-virgulas-dentro-de-um-dataframe-121090
transactions['valor_unit'] = transactions['valor_unit'].str.replace(",",".")

# Convert the "valor_uni" variable from object type to float64 type
# https://www.statology.org/pandas-convert-object-to-float/

transactions['valor_unit'] = transactions['valor_unit'].astype(float)
In [16]:
# change the decimal delimiter of the the variable "total_item" from "," to "."
# https://cursos.alura.com.br/forum/topico-transformar-pontos-por-virgulas-dentro-de-um-dataframe-121090
transactions['total_item'] = transactions['total_item'].str.replace(",",".")

# Convert the "total_item" variable from object type to float64 type
# https://www.statology.org/pandas-convert-object-to-float/

transactions['total_item'] = transactions['total_item'].astype(float)
In [17]:
# Convert "data_hora" to datetime object
transactions['data_hora'] = pd.to_datetime(transactions['data_hora'], format='%d/%m/%Y %H:%M')

# Creating a horario column that is a copy of data_hora and coverting it to datetime and extracting only the day;
transactions['horario'] = (transactions['data_hora'])
transactions['horario'] = transactions['horario'].dt.date

#How to Solve Python ValueError: unconverted data remains
#read://https_researchdatapod.com/?url=https%3A%2F%2Fresearchdatapod.com%2Fhow-to-solve-python-valueerror-unconverted-data-remains%2F
In [18]:
transactions.head()
Out[18]:
venda data_hora cliente mesa produto quantidade valor_unit desconto total_item horario
0 460 2022-02-01 08:13:00 CONSUMIDOR M 1 SUCO DE LARANJA 400ML 1 14.5 0.0 14.5 2022-02-01
1 460 2022-02-01 08:13:00 CONSUMIDOR M 1 PAO SOURDOUGH OVOS 1 15.5 0.0 15.5 2022-02-01
2 461 2022-02-01 08:14:00 CONSUMIDOR M 2 SUCO DE LARANJA 400ML 1 14.5 0.0 14.5 2022-02-01
3 461 2022-02-01 08:14:00 CONSUMIDOR M 2 PAO SOURDOUGH OVOS 1 15.5 0.0 15.5 2022-02-01
4 465 2022-02-01 09:01:00 CONSUMIDOR M 1 QUICHE BACON 90G 2 19.5 0.0 39.0 2022-02-01
In [19]:
transactions.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 25992 entries, 0 to 25997
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   venda       25992 non-null  int64         
 1   data_hora   25992 non-null  datetime64[ns]
 2   cliente     25992 non-null  object        
 3   mesa        25992 non-null  object        
 4   produto     25992 non-null  object        
 5   quantidade  25992 non-null  int64         
 6   valor_unit  25992 non-null  float64       
 7   desconto    25992 non-null  float64       
 8   total_item  25992 non-null  float64       
 9   horario     25992 non-null  object        
dtypes: datetime64[ns](1), float64(3), int64(2), object(4)
memory usage: 2.2+ MB
In [20]:
# https://www.dataquest.io/blog/python-datetime-tutorial/


# add hora and minuto column; using class attributes .hour and .minute

transactions['hora'] = transactions.data_hora.dt.hour
transactions['minuto'] = transactions.data_hora.dt.minute

Creating a column name 'hora_minuto'¶

In [21]:
# https://gist.github.com/krisbolton/aef6179ecf906892565d326b979b3a5b
# https://www.youtube.com/watch?v=TbXaP56Omuc


transactions['hora_minuto']=pd.to_datetime(transactions['data_hora'])

transactions['hora_minuto'] = transactions['hora_minuto'].dt.time

Creating the column "data" baseadao em "data_hora"¶

In [22]:
transactions['data'] = transactions['data_hora'].dt.normalize()

Extracting only the Date from 'data' column¶

In [23]:
## https://datagy.io/pandas-extract-date-from-datetime/
transactions['data'] = transactions['data_hora'].dt.normalize()
In [24]:
# Definindo a função time_of_day
def time_of_day(hora):
    """
    Determina se a  compra foi feita pela manhã, tarde ou noite.
    """
    
    if hora < 12:
        
        return 'manhã'
    
    elif hora < 17:
        
        return 'tarde'
    
    else:
        
        return 'noite'
In [25]:
# Apply time of day function to 'hora' column

transactions['time_of_day'] = transactions.hora.apply(time_of_day)
In [26]:
# https://pt.stackoverflow.com/questions/508484/mostrar-dias-da-semana#:~:text=Como%20weekday()%20retorna%20um,fim%20de%20semana%20ou%20n%C3%A3o.


# setar locale para português para que os dias da semana retornem em portugês no código abaixo.
locale.setlocale(locale.LC_ALL, 'pt_BR.utf-8')
Out[26]:
'pt_BR.utf-8'
In [27]:
# Add a day of week variable and a second classifier of weekday or weekend

from datetime import date
import calendar

transactions['day_of_week'] = [calendar.day_name[i.weekday()] for i in transactions['horario']]
In [28]:
transactions['day_type'] = ['FDS' if (i == 'domingo') | (i == 'sábado') else 'DDS' for i in 
                                                                            transactions['day_of_week']]
In [29]:
transactions.rename(columns = {'day_of_week':'dia_da_semana'}, inplace = True)
In [30]:
transactions.rename(columns = {'time_of_day':'hora_do_dia'}, inplace = True)
In [31]:
transactions.rename(columns = {'day_type':'tipo_de_dia'}, inplace = True)
In [32]:
# Add an item count variable equal to one for future groupby operations

transactions['item_count'] = 1
In [33]:
transactions.head()
Out[33]:
venda data_hora cliente mesa produto quantidade valor_unit desconto total_item horario hora minuto hora_minuto data hora_do_dia dia_da_semana tipo_de_dia item_count
0 460 2022-02-01 08:13:00 CONSUMIDOR M 1 SUCO DE LARANJA 400ML 1 14.5 0.0 14.5 2022-02-01 8 13 08:13:00 2022-02-01 manhã terça DDS 1
1 460 2022-02-01 08:13:00 CONSUMIDOR M 1 PAO SOURDOUGH OVOS 1 15.5 0.0 15.5 2022-02-01 8 13 08:13:00 2022-02-01 manhã terça DDS 1
2 461 2022-02-01 08:14:00 CONSUMIDOR M 2 SUCO DE LARANJA 400ML 1 14.5 0.0 14.5 2022-02-01 8 14 08:14:00 2022-02-01 manhã terça DDS 1
3 461 2022-02-01 08:14:00 CONSUMIDOR M 2 PAO SOURDOUGH OVOS 1 15.5 0.0 15.5 2022-02-01 8 14 08:14:00 2022-02-01 manhã terça DDS 1
4 465 2022-02-01 09:01:00 CONSUMIDOR M 1 QUICHE BACON 90G 2 19.5 0.0 39.0 2022-02-01 9 1 09:01:00 2022-02-01 manhã terça DDS 1

Verificando divergência dos dados¶

In [34]:
# Verificando se o valor "total_item" é igual a: "quantidade" * "valor_unit"

transactions["total_item_verif"] = transactions['quantidade']* transactions['valor_unit']
In [35]:
# Continuando o item anterior
# https://www.dataquest.io/blog/tutorial-add-column-pandas-dataframe-based-on-if-else-condition/

transactions['confirm'] = np.where(transactions['total_item'] > transactions['total_item_verif'], True, False)

Creating a Column diference¶

In [36]:
transactions['diference']=(transactions['total_item'])-(transactions['total_item_verif'])
In [37]:
# continuando o item anterior
resumo = transactions.loc[transactions["confirm"] == True, ["total_item", "total_item_verif", 'diference']].sum()

print (resumo)
total_item          53878.3
total_item_verif    22219.1
diference           31659.2
dtype: float64
In [ ]:
 

Gerando um relatório dos dados divergentes¶

In [38]:
# How do I select specific rows and columns from a DataFrame?
# https://pandas.pydata.org/docs/getting_started/intro_tutorials/03_subset_data.html#how-do-i-select-specific-rows-and-columns-from-a-dataframe
# https://www.geeksforgeeks.org/select-rows-columns-by-name-or-index-in-pandas-dataframe-using-loc-iloc/

ts2 = transactions.loc[transactions["confirm"] == True, ["venda", "data","produto", "quantidade", "valor_unit", "total_item", "total_item_verif"]]
In [39]:
ts2.head()
Out[39]:
venda data produto quantidade valor_unit total_item total_item_verif
7039 3571 2022-04-29 CROISSANT 1 10.0 25.5 10.0
7047 3575 2022-04-29 CROISSANT 1 10.0 28.9 10.0
7054 3582 2022-04-29 CROISSANT 1 10.0 25.5 10.0
7072 3599 2022-04-30 CROISSANT 1 10.0 19.5 10.0
7074 3600 2022-04-30 CROISSANT IFOOD 1 12.0 51.0 12.0

Criando um arquivo HTML dos dados divergentes usando Plotly ("ts2" dataframe)¶

/home/claudio/results/Dados_divergentes_vendas.html

/home/claudio/farina_in_natura001/CSV

GRÁFICO 01¶

In [40]:
fig_divergentes = go.Figure(data=[go.Table(
    header=dict(values=list(ts2.columns),
                font=dict(color='white', size=12), # configurando a cor e tamanho da letra
                fill_color='seagreen',
                align='left'),
    cells=dict(values=[ts2['venda'], ts2['data'], ts2['produto'], ts2['quantidade'], ts2['valor_unit'], ts2['total_item'], ts2['total_item_verif']],
               fill_color='lightcyan',
               align='left'))
])


fig_divergentes.update_layout(
    title={
        'text': "Dados divergentes - Valores de venda - (Fev 2022 - Out 22)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.iplot(fig_divergentes, filename='/home/claudio/results/grafico01 - Dados_divergentes_vendas.html')
In [41]:
# drop the 'confirm" and "total_item_verif" column

transactions = transactions.drop(['confirm'], axis=1)
transactions = transactions.drop(['total_item_verif'], axis=1)
transactions = transactions.drop (['diference'], axis=1)

Exploração dos Dados¶

In [42]:
# Find Unique Values of each item sold in the Column "produto"
# https://www.statology.org/pandas-unique-values-in-column/
# Pandas: get first 10 elements of a series
top_produtos = transactions.produto.value_counts()
print (top_produtos[:10])
CROISSANT                       2393
PASTEL DE NATA 60G              1339
CROISSANT  AMENDOAS 100G         742
FOLHADO MACA 80G                 719
SANDUICHE DE PEPERONI BRIE       596
COOKIES 80G                      559
CROISSANT IFOOD                  534
SOURDOUGH AZEITONA E ALECRIM     517
FOLHADO FRANGO 160G              500
BOMBOLONI DOCE DE LEITE 80G      487
Name: produto, dtype: int64

Converting a Series to a dataframe and changing the namee of the column¶

In [43]:
top_produtos_df = top_produtos.to_frame()
In [44]:
top_produtos_df = top_produtos_df.reset_index()
In [45]:
top_produtos_df.rename(columns = {'produto':'quantidade', 'index':'produto'}, inplace = True)
In [46]:
top_produtos_df.head()
Out[46]:
produto quantidade
0 CROISSANT 2393
1 PASTEL DE NATA 60G 1339
2 CROISSANT AMENDOAS 100G 742
3 FOLHADO MACA 80G 719
4 SANDUICHE DE PEPERONI BRIE 596

Criando um arquivo HTML usando Plotly¶

GRÁFICO 02¶

In [47]:
fig_top_produtos = go.Figure(data=[go.Table(
    header=dict(values=list(top_produtos_df.columns),
                font=dict(color='white', size=12), # configurando a cor e tamanho da letra
                fill_color='seagreen',
                align='left'),
    cells=dict(values=[top_produtos_df['produto'], top_produtos_df['quantidade'] ],
               fill_color='lightcyan',
               align='left'))
])


fig_top_produtos.update_layout(
    title={
        'text': "Produtos mais vendidos - (Fev 2022 - Out 22)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.iplot(fig_top_produtos, filename='/home/claudio/results/grafico02 - Top_products_2022.html')

Find the percentage of itens sold¶

In [48]:
# find the percentage each item is selling
top_produtos_perc = transactions.produto.value_counts(True)
print (top_produtos_perc[:10])
CROISSANT                       0.092067
PASTEL DE NATA 60G              0.051516
CROISSANT  AMENDOAS 100G        0.028547
FOLHADO MACA 80G                0.027662
SANDUICHE DE PEPERONI BRIE      0.022930
COOKIES 80G                     0.021507
CROISSANT IFOOD                 0.020545
SOURDOUGH AZEITONA E ALECRIM    0.019891
FOLHADO FRANGO 160G             0.019237
BOMBOLONI DOCE DE LEITE 80G     0.018737
Name: produto, dtype: float64

Converting to a Dataframe¶

In [49]:
top_produtos_perc_df = top_produtos_perc.to_frame().reset_index()
In [50]:
top_produtos_perc_df.rename(columns = {'produto':'proporção', 'index':'produto'}, inplace = True)
In [51]:
top_produtos_perc_df.head()
Out[51]:
produto proporção
0 CROISSANT 0.092067
1 PASTEL DE NATA 60G 0.051516
2 CROISSANT AMENDOAS 100G 0.028547
3 FOLHADO MACA 80G 0.027662
4 SANDUICHE DE PEPERONI BRIE 0.022930
In [52]:
top_produtos_perc_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212 entries, 0 to 211
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   produto    212 non-null    object 
 1   proporção  212 non-null    float64
dtypes: float64(1), object(1)
memory usage: 3.4+ KB

inserting a column Ranking based on the value of the column proporção¶

In [53]:
# https://www.dataquest.io/blog/tutorial-add-column-pandas-dataframe-based-on-if-else-condition/
In [54]:
# if a prduct has a percentage greater than 1,7%, its ranked, 
# if lower than this its aggregate with 'outros'

top_produtos_perc_df['ranking']= np.where (
    top_produtos_perc_df['proporção']> 0.017, 
    top_produtos_perc_df['produto'], 'outros' )

Criando um arquivo html usando Plotly¶

Gráfico 03¶

In [55]:
fig_top_produtos_perc = go.Figure(data=[go.Table(
    header=dict(values=list(top_produtos_perc_df.columns),
                font=dict(color='white', size=12), # configurando a cor e tamanho da letra
                fill_color='seagreen',
                align='left'),
    cells=dict(values=[top_produtos_perc_df['produto'], 
                       top_produtos_perc_df['proporção'],
                        top_produtos_perc_df['ranking'] ],
               fill_color='lightcyan',
               align='left'))
])


fig_top_produtos_perc.update_layout(
    title={
        'text': "Produtos mais vendidos proporcional - (Fev 2022 - Out 22)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.iplot(fig_top_produtos_perc, filename='/home/claudio/results/grafico03 - top_products_proporc_2022.html')

Inserir um gráfico de pizza da tabela acima¶

GRÁFICO 3A¶

In [56]:
fig_pie_3 = go.Figure(data=[go.Pie(labels=top_produtos_perc_df['ranking'], values=top_produtos_perc_df['proporção'])])

fig_pie_3.layout = dict(title="Percetual dos itens mais vendidos (Fev 2022 - Out 22) <br><sup></sup>", showlegend=True
)

py.iplot(fig_pie_3, filename='/home/claudio/results/grafico03A - Percetual_dos_itens_mais_vendidos.html')

Algumas métricas importantes¶

In [57]:
## Get unique names in the "produto" column using Series.unique() 
## https://sparkbyexamples.com/pandas/pandas-count-unique-values-in-column/
num_itens_vend = transactions.produto.unique().size

print('O número de diferentes itens vendido pela loja é:', num_itens_vend)
O número de diferentes itens vendido pela loja é: 212
In [58]:
# Look at the average number of items purchased...

avg_items = transactions.groupby(by='venda')['produto'].count().mean()

print('A média de itens por venda é: %.2f itens' %avg_items)
A média de itens por venda é: 2.79 itens
In [59]:
# ...and how that varies by time of day

single_transaction = pd.DataFrame(transactions.groupby(by='venda')['item_count'].sum())

single_transaction = single_transaction.merge(transactions[['hora','minuto','hora_do_dia','data', 'venda']]\
                                              .drop_duplicates('venda'), left_on=single_transaction.index, 
                                              right_on='venda', how='left', copy=False)

print('O número médio de itens vendidos por período do dia é: \n{0}'.format(
    single_transaction.groupby(by='hora_do_dia')['item_count'].mean()))
O número médio de itens vendidos por período do dia é: 
hora_do_dia
manhã    3.046419
noite    2.663510
tarde    2.783376
Name: item_count, dtype: float64

Creating a plot with the TOP itens sold per period of the day¶

GRÁFICO 04¶

In [60]:
# Find the frequency of each item purchased and by time of day

item_frequencies_all = transactions.produto.value_counts()
item_frequencies_morning = transactions[transactions.hora_do_dia == 'manhã'].produto.value_counts()
item_frequencies_afternoon = transactions[transactions.hora_do_dia == 'tarde'].produto.value_counts()
item_frequencies_evening = transactions[transactions.hora_do_dia == 'noite'].produto.value_counts()
In [61]:
# Plot the frequency of purchase for each item

# https://community.plotly.com/t/errors-accessing-plotly-api/33271
# ok, so I found a solution to my problem - when I let my program run through, the error message produced is:
#chart_studio.exceptions.PlotlyRequestError: Authentication credentials were not provided.
#I Googled this error and there were suggestions to run plotly in offline mode by replacing:



# Solving the problem of the 'plotly' online library with chart_studio
# https://plotly.com/python/getting-started-with-chart-studio/

##  Now I'm using Plotly Express (px) and Graphics Objects (go) # 13/03/2023

button_layer_1_height = 1.12
button_layer_2_height = 1.065

item_threshold_all = 225
item_threshold_other = 125

# https://plotly.com/python/bar-charts/#basic-bar-charts-with-plotlygraphobjects
# Customizing Individual Bar Colors

# Cores das barras todo dia
colors1 = ['seagreen',] * len(transactions)
colors1[13]= 'crimson' # destacar a venda de café

# Cores das barras manhã
colors2 = ['seagreen',] * len(transactions)
colors2[12]= 'crimson' # destacar venda de café

# Cores das barras tarde
colors3 = ['seagreen',] * len(transactions)
colors3[11]= 'crimson' # destacar venda de café

# Cores das barras noite
colors4 = ['seagreen',] * len(transactions)
colors4[18]= 'crimson' # destacar venda de café



# https://plotly.com/python/bar-charts/#basic-bar-charts-with-plotlygraphobjects - customizando as barras
trace_all = go.Bar(
            x=item_frequencies_all[item_frequencies_all > item_threshold_all].index,
            y=item_frequencies_all[item_frequencies_all > item_threshold_all].values,
            visible=True,
            marker_color=colors1,
            name='Dia Inteiro'
    )

trace_morning = go.Bar(
            x=item_frequencies_morning[item_frequencies_morning > item_threshold_other].index,
            y=item_frequencies_morning[item_frequencies_morning > item_threshold_other].values,
            visible=False,
            marker_color= colors2,
            name='Manhã'
    )

trace_afternoon = go.Bar(
            x=item_frequencies_afternoon[item_frequencies_afternoon > item_threshold_other].index,
            y=item_frequencies_afternoon[item_frequencies_afternoon > item_threshold_other].values,
            visible=False,
            marker_color=colors3,
            name='Tarde'
    )

trace_evening = go.Bar(
            x=item_frequencies_evening[item_frequencies_evening > item_threshold_other].index,
            y=item_frequencies_evening[item_frequencies_evening > item_threshold_other].values,
            visible=False,
            marker_color= colors4,
            name='Noite'
    )



data04 = [trace_all, trace_morning, trace_afternoon, trace_evening]
In [62]:
# Inserir um update _layout para colocar um subtitle
# https://stackoverflow.com/questions/58166002/how-to-add-caption-subtitle-using-plotly-method-in-python
In [63]:
updatemenus = list([
    
    dict(
         buttons=list([   
            dict(label = 'Dia inteiro',
                 method = 'update',
                 args = [{'visible': [True, False, False, False]},
                         {'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Dia inteiro - produtos com mais de 225 itens vendidos</sup>'}]),
             
            dict(label = 'Manhã',
                 method = 'update',
                 args = [{'visible': [False, True, False, False]},
                         {'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Manhã - produtos com mais de 125 itens vendidos</sup>'}]),
             
            dict(label = 'Tarde',
                 method = 'update',
                 args = [{'visible': [False, False, True, False]},
                         {'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Tarde - produtos com mais de 125 itens vendidos</sup>'}]),
             
            dict(label = 'Noite',
                 method = 'update',
                 args = [{'visible': [False, False, False, True]},
                         {'title': 'Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Noite - produtos com mais de 125 itens vendidos</sup>'}])
        ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.6, # range from -2 to 3
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' )
])
    
In [64]:
            
layout04 = dict(title="Os mais vendidos por período (Fev 2022 - Out 22) <br><sup>Dia inteiro -- produtos com mais de 225 itens vendidos</sup>", showlegend=False,
              updatemenus=updatemenus)

# to change the background color

layout04.update(
    plot_bgcolor='lightcyan', 
    xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
    yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
In [65]:
fig04 = dict(data=data04, layout=layout04)

py.iplot(fig04, filename='/home/claudio/results/grafico04 - Frequencia_vendas_periodo_dia.html')

Creating a plot to show the TOP itens sold by month (fevereiro 2022 -outubro 2022)¶

GRÁFICO 05¶

In [66]:
f_filter = (transactions['data']!='2022-06-23')#&(transactions['data']!='2022-07-30')
transactions[f_filter]

transactions_filtrada = transactions[f_filter]

Criar a váriavel mes baseada na variavel horario¶

In [67]:
transactions_filtrada['mes']=(transactions_filtrada['data'].dt.month)
/tmp/ipykernel_216/1953160630.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Creating the plot¶

In [68]:
# Find the frequency of each item purchased by month

item_frequencies_periodo = transactions_filtrada.produto.value_counts()
item_frequencies_jan = transactions_filtrada[transactions_filtrada.mes== 1].produto.value_counts()
item_frequencies_fev = transactions_filtrada[transactions_filtrada.mes== 2].produto.value_counts()
item_frequencies_mar = transactions_filtrada[transactions_filtrada.mes== 3].produto.value_counts()
item_frequencies_abr = transactions_filtrada[transactions_filtrada.mes== 4].produto.value_counts()
item_frequencies_mai = transactions_filtrada[transactions_filtrada.mes== 5].produto.value_counts()
item_frequencies_jun = transactions_filtrada[transactions_filtrada.mes== 6].produto.value_counts()
item_frequencies_jul = transactions_filtrada[transactions_filtrada.mes== 7].produto.value_counts()
item_frequencies_ago = transactions_filtrada[transactions_filtrada.mes== 8].produto.value_counts()
item_frequencies_set = transactions_filtrada[transactions_filtrada.mes== 9].produto.value_counts()
item_frequencies_out = transactions_filtrada[transactions_filtrada.mes== 10].produto.value_counts()
item_frequencies_nov = transactions_filtrada[transactions_filtrada.mes== 11].produto.value_counts()
item_frequencies_dez = transactions_filtrada[transactions_filtrada.mes== 12].produto.value_counts()

# Plot the frequency of purchase for each item by month

button_layer_1_height = 1.12
button_layer_2_height = 1.065

item_threshold_all = 225
item_threshold_other = 40

colors1 = ['seagreen',] * len(transactions)

trace_periodo = go.Bar(
            x=item_frequencies_periodo[item_frequencies_periodo > item_threshold_all].index,
            y=item_frequencies_periodo[item_frequencies_periodo > item_threshold_all].values, text=item_frequencies_periodo,
            visible=True,
            marker_color=colors1,
            name='Todo periodo'
    )

trace_jan = go.Bar(
            x=item_frequencies_jan[item_frequencies_jan > item_threshold_other].index,
            y=item_frequencies_jan[item_frequencies_jan > item_threshold_other].values,text=item_frequencies_jan,
            visible=False,
            marker_color=colors1,
            name='Janeiro'
    )

trace_fev = go.Bar(
            x=item_frequencies_fev[item_frequencies_fev > item_threshold_other].index,
            y=item_frequencies_fev[item_frequencies_fev > item_threshold_other].values,text=item_frequencies_fev,
            visible=False,
            marker_color=colors1,
            name='Fevereiro'
    )

trace_mar = go.Bar(
            x=item_frequencies_mar[item_frequencies_mar > item_threshold_other].index,
            y=item_frequencies_mar[item_frequencies_mar > item_threshold_other].values,text=item_frequencies_mar,
            visible=False,
            marker_color=colors1,
            name='Março'
    )
trace_abr = go.Bar(
            x=item_frequencies_abr[item_frequencies_abr > item_threshold_other].index,
            y=item_frequencies_abr[item_frequencies_abr > item_threshold_other].values,text=item_frequencies_abr,
            visible=False,
            marker_color=colors1,
            name='Abril'
)
trace_mai = go.Bar(
            x=item_frequencies_mai[item_frequencies_mai > item_threshold_other].index,
            y=item_frequencies_mai[item_frequencies_mai > item_threshold_other].values,text=item_frequencies_mai,
            visible=False,
            marker_color=colors1,
            name='Maio'
)
trace_jun = go.Bar(
            x=item_frequencies_jun[item_frequencies_jun > item_threshold_other].index,
            y=item_frequencies_jun[item_frequencies_jun > item_threshold_other].values,text=item_frequencies_jun,
            visible=False,
            marker_color=colors1,
            name='Junho'
)
trace_jul = go.Bar(
            x=item_frequencies_jul[item_frequencies_jul > 25].index,
            y=item_frequencies_jul[item_frequencies_jul > 25].values,text=item_frequencies_jul,
            visible=False,
            marker_color=colors1,
            name='Julho'
)
trace_ago = go.Bar(
            x=item_frequencies_ago[item_frequencies_ago > item_threshold_other].index,
            y=item_frequencies_ago[item_frequencies_ago > item_threshold_other].values,text=item_frequencies_ago,
            visible=False,
            marker_color=colors1,
            name='Agosto'
)
trace_set = go.Bar(
            x=item_frequencies_set[item_frequencies_set > item_threshold_other].index,
            y=item_frequencies_set[item_frequencies_set > item_threshold_other].values,text=item_frequencies_set,
            visible=False,
            marker_color=colors1,
            name='Setembro'
)
trace_out = go.Bar(
            x=item_frequencies_out[item_frequencies_out > item_threshold_other].index,
            y=item_frequencies_out[item_frequencies_out > item_threshold_other].values,text=item_frequencies_out,
            visible=False,
            marker_color=colors1,
            name='Outubro'
)
trace_nov = go.Bar(
            x=item_frequencies_nov[item_frequencies_nov > item_threshold_other].index,
            y=item_frequencies_nov[item_frequencies_nov > item_threshold_other].values,text=item_frequencies_nov,
            visible=False,
            marker_color=colors1,
            name='Novembro'
)
trace_dez = go.Bar(
            x=item_frequencies_dez[item_frequencies_dez > item_threshold_other].index,
            y=item_frequencies_dez[item_frequencies_dez > item_threshold_other].values,text=item_frequencies_dez,
            visible=False,
            marker_color=colors1,
            name='Dezembro'
)




data_mes = [trace_periodo, trace_jan, trace_fev, trace_mar, trace_abr, trace_mai, trace_jun, trace_jul, trace_ago, trace_set, trace_out, trace_nov, trace_dez]


updatemenus2 = list([

    dict
        (
         buttons=list([   
            
                dict(label = 'Todo Ano',
                 method = 'update',
                 args = [{'visible': [True, False, False, False, False, False, False, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Todo Ano</sup>'}]),
             
                dict(label = 'Janeiro',
                 method = 'update',
                 args = [{'visible': [False, True, False, False, False, False, False, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Janeiro</sup>'}]),
             
                dict(label = 'Fevereiro',
                 method = 'update',
                 args = [{'visible': [False, False, True, False, False, False, False, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Fevereiro</sup>'}]),
             
                dict(label = 'Março',
                 method = 'update',
                 args = [{'visible': [False, False, False, True, False, False, False, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Março</sup>'}]),
            
                dict(label = 'Abril',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, True, False, False, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Abril</sup>'}]),

                dict(label = 'Maio',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, True, False, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Maio</sup>'}]),

                dict(label = 'Junho',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, True, False, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Junho</sup>'}]),

                dict(label = 'Julho',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, True, False, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Julho</sup>'}]),
        
                dict(label = 'Agosto',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, False, True, False, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Agosto</sup>'}]),

                dict(label = 'Setembro',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, False, False, True, False, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Setembro</sup>'}]),

                dict(label = 'Outubro',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, False, False, False, True, False, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Outubro</sup>'}]),

                dict(label = 'Novembro',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, False, False, False, False, True, False]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Novembro</sup>'}]),
                        
                dict(label = 'Dezembro',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, False, False, False, False, False, True]},
                         {'title': 'Vendas Farina por item - ano 2022 <br><sup>Dezembro</sup>'}])
         ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.3,
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' 
        )
])
                


layout_mes = dict(title='Vendas Farina por item - ano 2022 <br><sup>Todo Ano</sup>', showlegend=False, updatemenus=updatemenus2)

# to change the background color

layout_mes.update(
    plot_bgcolor='lightcyan', 
    xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
    yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
fig = dict(data=data_mes, layout=layout_mes)
py.iplot(fig, filename='/home/claudio/results/grafico05 - Frequencia_vendas_itens_mensal.html')

Creating a table and an animated frequencie line¶

In [69]:
# Sales trend by date
# https://www.youtube.com/watch?v=Xjm7fOw6gjY
# use reset index for transforming the data in a variable not a index

grouped_by_date = pd.DataFrame(transactions.groupby(by='horario')['item_count'].sum().reset_index())
In [70]:
grouped_by_date.head()
Out[70]:
horario item_count
0 2022-02-01 74
1 2022-02-02 88
2 2022-02-03 81
3 2022-02-04 110
4 2022-02-05 112

GRAFICO 06¶

In [71]:
fig_grouped_by_date = go.Figure(data=[go.Table(
    header=dict(values=list(['Data', 'Quantidade']),
                font=dict(color='white', size=12), # configurando a cor e tamanho da letra
                fill_color='seagreen',
                align='left'),
    cells=dict(values=[grouped_by_date['horario'], 
                       grouped_by_date['item_count'],],
               fill_color='lightcyan',
               align='left'))
])

fig_grouped_by_date.update_layout (dict(title={
        'text': "Venda de itens por dia - (Fev 2022 - Out 22)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}), showlegend=True)


py.iplot(fig_grouped_by_date, filename='/home/claudio/results/grafico06 - tabela_vendas_por_dia_v2.html')

GRAFICO 07¶

In [72]:
vendas = px.line(grouped_by_date, x = 'horario', y = 'item_count', title = 'Itens vendidos por dia - Ano 2022', labels= {'item_count' : 'Quantidade','horario': 'Data'})

# update line color
vendas.update_traces(line_color='seagreen')

# update backgournd
vendas.update_layout(
    plot_bgcolor='lightcyan', 
    xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
    yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)

# adicionando anaotação
# https://plotly.com/python/styling-plotly-express/
vendas.add_annotation( # add a text callout with arrow
    text="Vendas muito acima do padrão, dia 23 de Junho", x="2022-06-23", y=2300, arrowhead=1, showarrow=True
)

vendas.add_annotation( # add a text callout with arrow
    text="Não foram informadas vendas <br /> entre 12 de Jul e 29 de Jul", x="2022-07-15", y=150, arrowhead=1, showarrow=True)


py.iplot(vendas, filename='/home/claudio/results/grafico07 - Vendas_por_dia_linha.html')

Grafico 08¶

In [73]:
# ajustando o gráfio para retirar dois outliers (490 em 2022-07-30 , 2303 em 2022-06-23 ), utilizando-se de filtro com a função 'where'
indice_atip = np.where((grouped_by_date['item_count'] < 400))
linha=grouped_by_date.loc[indice_atip]
In [74]:
# https://towardsdatascience.com/line-chart-animation-with-plotly-on-jupyter-e19c738dc882



#linha=grouped_by_date: usar se precisar demonstrar os dois outliers

trace1 = go.Scatter(x=linha['horario'][:2],
                    y=linha['item_count'][:2],
                    mode='lines', 
                    line=dict(color='seagreen', width=1.5), # mudamos a cor da linha
                    fill = 'tozeroy',
                   fillcolor = 'lightblue'
)
frames = [dict(data= [dict(type='scatter',
                           x=linha['horario'][:k+1],
                           y=linha['item_count'][:k+1]),
                     ],
               traces= [0],  
              )for k  in  range(1, len(linha)-1)]
layout = go.Layout(width=1180,
                   height=750,
                   showlegend=False,
                   hovermode='closest',
                   updatemenus=[
                        dict(
                            type='buttons', showactive=False,
                            y=1.08,
                            x=0.12,
                            xanchor='center',
                            yanchor='auto',
                            pad=dict(t=0, r=10),
                            buttons=[dict(label='Play',
                            method='animate',
                            args=[None, 
                                  dict(frame=dict(duration=30, 
                                                  redraw=False),
                                                  transition=dict(duration=0),
                                                  fromcurrent=True,
                                                  mode='immediate')]
                            )]
                        ),
                        
                    ]              
                  )
layout.update(xaxis =dict(range=['2022-02-01', '2022-10-31'], autorange=False),
              yaxis =dict(range=[0, 250], autorange=False), xaxis_title = "Data", yaxis_title = "Quantidade"); # ajustar o range se precisar colocar os outliers

fig_vendas = go.Figure(data=[trace1], frames=frames, layout=layout)

# Adicionamos Título
fig_vendas.update_layout (dict(title={
        'text': "Vendas por dia desconsiderando os outliers - (Fev 2022 - Out 22)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}), showlegend=False, )

# mudamos o background
fig_vendas.update_layout(
    plot_bgcolor='lightcyan', 
    xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
    yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
# adicionamos anotação
fig_vendas.add_annotation( # add a text callout with arrow
    text="Retriramos os dias 23 de Junho e <br /> 30 de Julho", x="2022-07-15", y=200, arrowhead=1, showarrow=False)


py.iplot(fig_vendas, filename='/home/claudio/results/grafico08 - Vendas_por_dia_farina_interativo.html')

Investigando dados atipicos¶

GRÁFICO 09¶

In [75]:
# Vendas atípicas no dia 23 de Junho de 2022 entre 16:00 e 17:00

df_invest = pd.DataFrame(transactions).query("data == 'Jun 23, 2022'").groupby(by='hora')['item_count'].sum().reset_index()
In [76]:
df_invest
Out[76]:
hora item_count
0 7 8
1 9 5
2 10 5
3 11 7
4 12 4
5 13 8
6 14 5
7 15 3
8 16 2254
9 17 3
10 18 1
In [77]:
fig_atipicos = go.Figure(data=[go.Table(
    header=dict(values=list({'Hora do dia', 'Quantidade'}),
                font=dict(color='white', size=12), # configurando a cor e tamanho da letra
                fill_color='seagreen',
                align='left'),
    cells=dict(values=[df_invest['hora'], df_invest['item_count']],
               fill_color='lightcyan',
               align='left'))
])


fig_atipicos.update_layout(
    title={
        'text': "Investigando dados atípicos - dia 23 Junho 2022",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.iplot(fig_atipicos, filename='/home/claudio/results/grafico09 - investiga_tabela.html')

GRÁFICO 10¶

In [78]:
investiga_dados = px.line(df_invest, x = 'hora', y = 'item_count', title = 'Investigando dados atipicos - <br />Vendas no dia 23 de Junho 2022 por hora do dia', 
                          labels= {'item_count' : 'Quantidade','hora': 'Hora do dia'}, log_y= True)

# update line color
investiga_dados.update_traces(line_color='seagreen')

# update backgournd
investiga_dados.update_layout(
    plot_bgcolor='lightcyan', 
    xaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
    yaxis =dict(mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)

# adicionando anaotação
# https://plotly.com/python/styling-plotly-express/
investiga_dados.add_annotation( # add a text callout with arrow
    text="Vendas entre <br />16 e 17 horas", x=15.5, y=2.9, arrowhead=4, showarrow=True
)


py.iplot(investiga_dados, filename='/home/claudio/results/grafico10 - investiga_graf.html')

Encontrados dados atípicos nos dias 23 de Junho de 2022 e 30 de Julho de 2022, portanto uma filtragem desses dados se faz necessária¶

In [79]:
f_filter = (transactions['data']!='2022-06-23')&(transactions['data']!='2022-07-30')
transactions[f_filter]

transactions_filtrada = transactions[f_filter]

Mais informações relevantes¶

Vendas dias de semana (DDS) vs Final de semana (FDS)¶

In [80]:
dds_vendas2= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'DDS'].groupby(by='horario')['item_count'].sum()).reset_index()
fds_vendas2= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'FDS'].groupby(by='horario')['item_count'].sum()).reset_index()
In [81]:
dds_vendas2.head()
Out[81]:
horario item_count
0 2022-02-01 74
1 2022-02-02 88
2 2022-02-03 81
3 2022-02-04 110
4 2022-02-07 61
In [82]:
fds_vendas2.head()
Out[82]:
horario item_count
0 2022-02-05 112
1 2022-02-06 121
2 2022-02-12 108
3 2022-02-13 186
4 2022-02-19 107

Vendas dia de semana(DDS)¶

In [83]:
print('O menor número de vendas em um dia de semana foi: \n{0}'.format(
    dds_vendas2.item_count.min()))
print('O maior número de vendas em um dia de semana foi: \n{0}'.format(
    dds_vendas2.item_count.max()))
print('O número médio de vendas em um dia de semana é: \n{0}'.format(
    dds_vendas2.item_count.mean()))
O menor número de vendas em um dia de semana foi: 
1
O maior número de vendas em um dia de semana foi: 
241
O número médio de vendas em um dia de semana é: 
75.81005586592178

Vendas final de semana (FDS)¶

In [84]:
print('O menor número de vendas em um dia de fim-de-semana foi: \n{0}'.format(
    fds_vendas2.item_count.min()))
print('O  maior número de vendas em um dia fim-de-semana foi: \n{0}'.format(
    fds_vendas2.item_count.max()))
print('O número médio de vendas em um dia fim-de-semana é: \n{0}'.format(
    fds_vendas2.item_count.mean()))
O menor número de vendas em um dia de fim-de-semana foi: 
65
O  maior número de vendas em um dia fim-de-semana foi: 
227
O número médio de vendas em um dia fim-de-semana é: 
133.73611111111111

Número de itens médios vendidos por cada pedido¶

In [85]:
single_transaction = pd.DataFrame(transactions_filtrada.groupby(by='venda')['item_count'].sum())

single_transaction = single_transaction.merge(transactions_filtrada[['tipo_de_dia','horario', 'venda']]\
                                              .drop_duplicates('venda'), left_on=single_transaction.index, 
                                              right_on='venda', how='left', copy=False)

print('O número médio de itens em cada venda por tipo de dia é: \n{0}'.format(
    single_transaction.groupby(by='tipo_de_dia')['item_count'].mean()))
O número médio de itens em cada venda por tipo de dia é: 
tipo_de_dia
DDS    2.650391
FDS    2.836230
Name: item_count, dtype: float64

Itens vendidos por tipo de dia¶

Grafico 10A¶

In [86]:
# Histogram of items sold by type of day

trace_dds = go.Histogram(
    x=dds_vendas2.item_count,
    opacity=0.75,
    name = 'Dia de Semana'
)

trace_fds = go.Histogram(
    x=fds_vendas2.item_count,
    opacity=0.65,
    name = 'Fim-de-semana',
    nbinsx = 16
)

data_10A = [trace_dds, trace_fds]
layout_10A = go.Layout( title = 'Distribuição das vendas por dias de semana/ finais de semana',
                   xaxis=dict(title='Número de Itens Vendidos'),
                   yaxis=dict(title='Frequencia de vendas'),
                   barmode='overlay'
                   )

fig_10A = go.Figure(data=data_10A, layout=layout_10A)

py.iplot(fig_10A, filename='/home/claudio/results/grafico10A - tipo-de_dia_itens_histograma.html')
In [87]:
# Sales trend by date
# https://www.youtube.com/watch?v=Xjm7fOw6gjY
# use reset index for transforming the data in a variable not a index

# Group by(horario), adding one more column (tipo_de_dia)
# https://www.easytweaks.com/pandas-groupby-to-dataframe/
tipo_de_dia_df= pd.DataFrame(transactions_filtrada.groupby(by=['horario','tipo_de_dia'])['item_count'].sum().reset_index())
In [88]:
tipo_de_dia_df.head()
Out[88]:
horario tipo_de_dia item_count
0 2022-02-01 DDS 74
1 2022-02-02 DDS 88
2 2022-02-03 DDS 81
3 2022-02-04 DDS 110
4 2022-02-05 FDS 112

Grafico 10B¶

In [89]:
# Line chart of items sold by type of day

# Create traces
tipo_dia_graf = go.Figure()
tipo_dia_graf.add_trace(go.Scatter(x=fds_vendas2['horario'], y=fds_vendas2['item_count'],
                    mode='lines',
                    name='Fim de Semana',
                    line=dict(color='darkblue', width=1.5)))
tipo_dia_graf.add_trace(go.Scatter(x=dds_vendas2['horario'], y=dds_vendas2['item_count'],
                    mode='lines',
                    name='Dia de Semana',
                    line=dict(color='seagreen', width=1.5))) # mudamos a cor da linha

tipo_dia_graf.update_layout ( title = {'text' : 'Venda de itens <br />Dias de semana X Finais de semana - 2022 (Fev-Out)'},
                   xaxis=dict(title='Data', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
                   yaxis=dict(title='Itens vendidos', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
                   plot_bgcolor='lightcyan', # update Background
    
)
py.iplot(tipo_dia_graf, filename='/home/claudio/results/grafico10B - tipo-de_dia_2_linhas.html')

Faturamento por tipo de dia (DDS x FDS)¶

In [90]:
dds_fatu= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'DDS'].groupby(by='horario')['total_item'].sum()).reset_index()
fds_fatu= pd.DataFrame(transactions_filtrada[transactions_filtrada.tipo_de_dia == 'FDS'].groupby(by='horario')['total_item'].sum()).reset_index()
In [91]:
dds_fatu.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179 entries, 0 to 178
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   horario     179 non-null    object 
 1   total_item  179 non-null    float64
dtypes: float64(1), object(1)
memory usage: 2.9+ KB

Grafico 11¶

In [92]:
# Histograma

trace_dds = go.Histogram(
    x=dds_fatu[dds_fatu.total_item  < 7900].total_item,
    opacity=0.75,
    name = 'Dia de Semana'
)

trace_fds = go.Histogram(
    x=fds_fatu[fds_fatu.total_item < 7900].total_item,
    opacity=0.65,
    name = 'Fim-de-semana',
    nbinsx = 16
)

data_11 = [trace_dds, trace_fds]
layout_11 = go.Layout( title = 'Distribuição do faturamento por dias de semana/ finais de semana',
                   xaxis=dict(title='Faturamento por dia'),
                   yaxis=dict(title='Frequencia de faturamento diário'),
                   barmode='overlay'
                   )

fig_11 = go.Figure(data=data_11, layout=layout_11)

py.iplot(fig_11, filename='/home/claudio/results/grafico11 - tipo-de_dia_faturamento_histograma.html')

Gráfico 11B¶

In [93]:
# Line chart of items sold by type of day

# Create traces
tipo_dia_fatu_graf = go.Figure()
tipo_dia_fatu_graf.add_trace(go.Scatter(x=fds_fatu['horario'], y=fds_fatu['total_item'],
                    mode='lines',
                    name='Fim de Semana',
                    line=dict(color='darkblue', width=1.5)))
tipo_dia_fatu_graf.add_trace(go.Scatter(x=dds_fatu['horario'], y=dds_fatu['total_item'],
                    mode='lines',
                    name='Dia de Semana',
                    line=dict(color='seagreen', width=1.5))) # mudamos a cor da linha

tipo_dia_fatu_graf.update_layout ( title = {'text' : 'Faturamento <br />Dias de semana X Finais de semana - 2022 (Fev-Out)'},
                   xaxis=dict(title='Data', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
                   yaxis=dict(title='Faturamento', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
                   plot_bgcolor='lightcyan', # update Background
    
)
py.iplot(tipo_dia_fatu_graf, filename='/home/claudio/results/grafico11B - faturamento_tipo_de_dia_2_linhas.html')

Número de itens vendidos por dia da semana¶

In [94]:
grouped_by_date_filtrada = pd.DataFrame(transactions_filtrada.groupby(by='horario')['item_count'].sum()).reset_index()
In [95]:
grouped_by_date_filtrada.head()
Out[95]:
horario item_count
0 2022-02-01 74
1 2022-02-02 88
2 2022-02-03 81
3 2022-02-04 110
4 2022-02-05 112

GAFICO 12¶

In [96]:
trace_todos_itens = go.Figure(data=[go.Box(y= grouped_by_date_filtrada['item_count'], boxpoints= "outliers", jitter = 0.5, whiskerwidth= 1, notchwidth=0.5,  fillcolor= 'seagreen', line=dict(color="seagreen" )

            )])

## adicionei uma anotação ao boxplot - https://community.plotly.com/t/how-to-save-labels-on-plotly-box-plot-instead-of-it-disappearing-when-not-hovering-on-it/54637

trace_todos_itens.add_annotation(x=0, y=250, #Min
            text="Dias com as vendas acima do normal - Outliers",
            font=dict(size=16),
            showarrow=True, arrowwidth= 2, arrowhead= 4
)
trace_todos_itens.update_layout(title= {'text': "Distribuição dos dias por quantidade de itens vendidos - 2022 (Fev-Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis=dict(title='Hora do dia', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
                   yaxis=dict(title='', mirror=True, showline=True, linecolor='black', gridcolor='lightblue'),
                   plot_bgcolor='lightcyan') # update Background

##### adicionar comentário ao boxplot



box_itens_por_dia= go.Figure(data = trace_todos_itens, layout = layout)

py.iplot(box_itens_por_dia, filename = '/home/claudio/results/grafico12 - Boxplot_Vendas_itens_ranking.html')
In [97]:
# Find the number of itens sold per day of week

agregado_vendas_por_dia_2 = transactions_filtrada.groupby(by=['dia_da_semana','horario'])['item_count'].sum()

agregado_vendas_por_dia_2 = pd.DataFrame(agregado_vendas_por_dia_2).reset_index()
In [98]:
agregado_vendas_por_dia_2.head(10)
Out[98]:
dia_da_semana horario item_count
0 domingo 2022-02-06 121
1 domingo 2022-02-13 186
2 domingo 2022-02-20 156
3 domingo 2022-02-27 86
4 domingo 2022-03-06 126
5 domingo 2022-03-13 145
6 domingo 2022-03-20 140
7 domingo 2022-03-27 121
8 domingo 2022-04-03 93
9 domingo 2022-04-10 77
In [99]:
agregado_vendas_por_dia_2 = agregado_vendas_por_dia_2[~agregado_vendas_por_dia_2.index.duplicated(keep='first')]

dias = ['segunda','terça','quarta','quinta','sexta','sábado','domingo']

vendas_por_dia = []

for dia in dias:
    
    df_dias= agregado_vendas_por_dia_2[agregado_vendas_por_dia_2.dia_da_semana == dia]
    
    vendas_por_dia.append(list(df_dias['item_count']))
    

Grafico 13¶

Plotly Express (px)¶

In [183]:
vendas_por_dia_item_graf = px.box(agregado_vendas_por_dia_2, y = 'item_count', x = 'dia_da_semana', color='dia_da_semana', labels={'item_count': 'Número de Itens Vendidos','dia_da_semana':'Dia da Semana'}, hover_data = [agregado_vendas_por_dia_2.item_count], points="all",
category_orders= {"dia_da_semana": ['sábado','domingo','segunda','terça','quarta','quinta','sexta']})

vendas_por_dia_item_graf.update_layout(title= {'text': "Distribuição das vendas por itens e dia da semana - 2022 (Fev-Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.plot(vendas_por_dia_item_graf, filename='/home/claudio/results/grafico13 - Vendas_agregadas_itens_dias_semana_PX.html')
Out[183]:
'/home/claudio/results/grafico13 - Vendas_agregadas_itens_dias_semana_PX.html'
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001
/usr/share/atom/atom: bad option: --pid=1670

Graphic Objects (go)¶

In [180]:
traces = []

for i, j in enumerate(dias):
    
    trace = go.Box(
    y=vendas_por_dia[i],
    name = j
    )
    traces.append(trace)

data_13 = traces

layout_13 = go.Layout(
                   title={
        'text': "Distribuição das vendas por dia da semana - 2022 (Fev-Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
                 
                  xaxis=dict(title='Dia da Semana', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
                   yaxis=dict(title='Número de Itens Vendidos', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
                   plot_bgcolor='lightcyan', # update Background
                showlegend= False)

fig_13 = go.Figure(data = data_13, layout = layout_13)

py.iplot(fig_13, filename='/home/claudio/results/grafico13 - Vendas_por_dia_da_semana_BoxplotV2.html')

Horário de vendas dos itens por dia da semana¶

In [101]:
dia_hora_df = transactions_filtrada[[ 'hora_minuto','dia_da_semana', 'hora', 'item_count']]

Modo Simples¶

Grafico 14¶

In [102]:
vendas_por_dia_hora_graf = px.box(dia_hora_df, y = 'hora', x = 'dia_da_semana', color='dia_da_semana', labels={'hora': 'Hora','dia_da_semana':'Dia da Semana'}, hover_data = [dia_hora_df.hora_minuto], points="all", 
category_orders= {"dia_da_semana": ['sábado','domingo','segunda','terça','quarta','quinta','sexta']})

vendas_por_dia_hora_graf.update_layout(title= {'text': "Distribuição das vendas por hora e dia da semana - 2022 (Fev-Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.iplot(vendas_por_dia_hora_graf, filename='/home/claudio/results/grafico14 - Vendas_agregadas_Hora_dias_semana.html')
In [103]:
# https://stackoverflow.com/questions/59815797/how-to-save-plotly-express-plot-into-a-html-or-static-image-file
# fig.write_html(r"C:\Users\ctobr\OneDrive\DATA_Science\1_farina_in_natura\farina_in_natura00\Vendas_por_dia_Boxplot_simples2.html")

Modo Complexo - Graphic Objects¶

Distribuição de vendas por hora do dia¶

GRAFICO 15¶

In [104]:
trace_todos = go.Figure(data=[go.Box(x= dia_hora_df['hora'], boxpoints= "outliers", jitter = 0.5, whiskerwidth= 1, notchwidth=0.5, name= "Diariamente", fillcolor= 'seagreen', line=dict(color="seagreen" )
)])

## adicionei uma anotação ao boxplot - https://community.plotly.com/t/how-to-save-labels-on-plotly-box-plot-instead-of-it-disappearing-when-not-hovering-on-it/54637

trace_todos.add_annotation(x=8, y=0.05, #Min
            text="25% das vendas do dia",
            font=dict(size=16),
            showarrow=True, arrowwidth= 2, arrowhead= 4
            )
trace_todos.add_annotation(x=14, y=0.25, #Min
            text="25% das vendas do dia ",
            font=dict(size=16),
            showarrow=True, arrowwidth= 2, arrowhead= 4
            )
trace_todos.add_annotation(x=17.25, y=0.25, #Min
            text="25% das vendas do dia ",
            font=dict(size=16),
            showarrow=True, arrowwidth= 2, arrowhead= 4
            )
trace_todos.add_annotation(x=20, y=0.05, #Min
            text="25% das vendas do dia ",
            font=dict(size=16),
            showarrow=True, arrowwidth= 2, arrowhead= 4
            )
trace_todos.update_layout(title= {'text': "Distribuição das vendas por hora do dia - 2022 (Fev-Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis=dict(title='Hora do dia', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
                   yaxis=dict(title='', mirror=True, showline=True, linecolor='black', gridcolor='lightcyan'),
                   plot_bgcolor='lightcyan') # update Background

##### adicionar comentário ao boxplot



todos_dias_graf = go.Figure(data = trace_todos, layout = layout)

py.iplot(todos_dias_graf, filename = '/home/claudio/results/grafico15 - Boxplot_Vendas_agregadas_hora_GO.html')

How to use Graphics Objects¶

In [105]:
#  https://www.youtube.com/watch?v=AwtQ7vJg0-o

Distribuição de vendas por dia da semana e hora do dia¶

In [106]:
dom = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'domingo']
seg = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'segunda']
ter = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'terça']
qua = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'quarta']
qui = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'quinta']
sex = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'sexta']
sab = dia_hora_df.hora[dia_hora_df['dia_da_semana']== 'sábado']
In [107]:
trace0 = go.Box(
    y= dom, boxpoints= "outliers", jitter = 0.5,
     name = 'Domingo', whiskerwidth= 1, notchwidth=0.5
)

trace1 = go.Box(
    y= seg,  boxpoints = 'outliers', jitter = 0.5,
    name = 'Segunda',  whiskerwidth= 1, notchwidth=0.5
)

trace2 = go.Box(
    y= ter, boxpoints = 'outliers', jitter = 0.5,
    name = 'Terça', whiskerwidth= 1, notchwidth=0.5
)

trace3 = go.Box(
    y= qua, boxpoints = 'outliers', jitter = 0.5,
    name = 'Quarta', whiskerwidth= 1, notchwidth=0.5
)

trace4 = go.Box(
    y= qui,boxpoints = 'outliers', jitter = 0.5,
    name = 'Quinta', whiskerwidth= 1, notchwidth=0.5
)

trace5 = go.Box(
    y= sex,boxpoints = 'outliers', jitter = 0.5,
    name = 'Sexta', whiskerwidth= 1, notchwidth=0.5
)

trace6 = go.Box(
    y= sab,boxpoints = 'outliers', jitter = 0.5,
    name = 'Sábado', whiskerwidth= 1, notchwidth=0.5
)

Grafico 16¶

In [108]:
data_16 = [trace6, trace0, trace1, trace2, trace3, trace4, trace5]
layout_16 = go.Layout(title={'text': "Distribuição das vendas por dia da semana e hora do dia - 2022 (Fev-Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis=dict(title='Dia da Semana', mirror=True, showline=True, linecolor='black', gridcolor='lightcyan'),
                   yaxis=dict(title='Hora do dia', mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'),
                   plot_bgcolor='lightcyan', showlegend= False,
                   ) # update Background




fig_16 = go.Figure(data = data_16, layout = layout_16)
In [109]:
py.iplot(fig_16, filename='/home/claudio/results/grafico16 - Vendas_hora_por_dia_Boxplot_GO.html')

Grafico 16B¶

Detalhamento do porcentual das vendas pelo periodo do dia¶

criando uma coluna no dataframe "transactions_filtrada_2"¶

In [110]:
# Criar o percentual por hora de vendas com filtro por dia da semana

vendas_por_hora_perc_all = transactions_filtrada.hora.value_counts(True)
vendas_por_hora_perc_dom = transactions_filtrada[transactions_filtrada.dia_da_semana=="domingo"].hora.value_counts(True)
vendas_por_hora_perc_seg = transactions_filtrada[transactions_filtrada.dia_da_semana=="segunda"].hora.value_counts(True)
vendas_por_hora_perc_ter = transactions_filtrada[transactions_filtrada.dia_da_semana=="terça"].hora.value_counts(True)
vendas_por_hora_perc_qua = transactions_filtrada[transactions_filtrada.dia_da_semana=="quarta"].hora.value_counts(True)
vendas_por_hora_perc_qui = transactions_filtrada[transactions_filtrada.dia_da_semana=="quinta"].hora.value_counts(True)
vendas_por_hora_perc_sex = transactions_filtrada[transactions_filtrada.dia_da_semana=="sexta"].hora.value_counts(True)
vendas_por_hora_perc_sab = transactions_filtrada[transactions_filtrada.dia_da_semana=="sábado"].hora.value_counts(True)
In [111]:
# Pandas: How to Represent value_counts as Percentage
# https://www.statology.org/pandas-value_counts-percentage/


vendas_por_hora_perc_all = transactions_filtrada.hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_dom = transactions_filtrada[transactions_filtrada.dia_da_semana=="domingo"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_seg = transactions_filtrada[transactions_filtrada.dia_da_semana=="segunda"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_ter = transactions_filtrada[transactions_filtrada.dia_da_semana=="terça"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_qua = transactions_filtrada[transactions_filtrada.dia_da_semana=="quarta"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_qui = transactions_filtrada[transactions_filtrada.dia_da_semana=="quinta"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_sex = transactions_filtrada[transactions_filtrada.dia_da_semana=="sexta"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
vendas_por_hora_perc_sab = transactions_filtrada[transactions_filtrada.dia_da_semana=="sábado"].hora.value_counts(normalize=True).mul(100).round(2).astype(str) + '%'
In [112]:
# Pandas: How to Represent value_counts as Percentage
# https://www.statology.org/pandas-value_counts-percentage/


vendas_por_hora_perc_all = transactions_filtrada.hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_dom = transactions_filtrada[transactions_filtrada.dia_da_semana=="domingo"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_seg = transactions_filtrada[transactions_filtrada.dia_da_semana=="segunda"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_ter = transactions_filtrada[transactions_filtrada.dia_da_semana=="terça"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_qua = transactions_filtrada[transactions_filtrada.dia_da_semana=="quarta"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_qui = transactions_filtrada[transactions_filtrada.dia_da_semana=="quinta"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_sex = transactions_filtrada[transactions_filtrada.dia_da_semana=="sexta"].hora.value_counts(normalize=True).mul(100).round(2)
vendas_por_hora_perc_sab = transactions_filtrada[transactions_filtrada.dia_da_semana=="sábado"].hora.value_counts(normalize=True).mul(100).round(2)
In [113]:
# Plotar o percentual em grafico de barras

button_layer_1_height = 1.12
button_layer_2_height = 1.065



colors2 = ['seagreen',]

trace_semana= go.Bar(
            x=vendas_por_hora_perc_all.index,
            y=vendas_por_hora_perc_all.values,
            text= vendas_por_hora_perc_all, 
            visible=True,
            marker_color=colors2,
            name='Semana'
    )

trace_dom = go.Bar(
            x=vendas_por_hora_perc_dom.index,
            y=vendas_por_hora_perc_dom.values,
            text= vendas_por_hora_perc_dom,
            visible=False,
            marker_color=colors2,
            name='Domingo'
    )

trace_seg = go.Bar(
            x=vendas_por_hora_perc_seg.index,
            y=vendas_por_hora_perc_seg.values,
            text= vendas_por_hora_perc_seg,
            visible=False,
            marker_color=colors2,
            name='Segunda'
    )

trace_ter = go.Bar(
            x=vendas_por_hora_perc_ter.index,
            y=vendas_por_hora_perc_ter.values,
            text = vendas_por_hora_perc_ter,
            visible=False,
            marker_color=colors2,
            name='Terça'
    )
trace_qua = go.Bar(
            x=vendas_por_hora_perc_qua.index,
            y=vendas_por_hora_perc_qua.values,
            text = vendas_por_hora_perc_qua,
            visible=False,
            marker_color=colors2,
            name='Quarta'
)
trace_qui = go.Bar(
            x=vendas_por_hora_perc_qui.index,
            y=vendas_por_hora_perc_qui.values,
            text = vendas_por_hora_perc_qui,
            visible=False,
            marker_color=colors2,
            name='Quinta'
)
trace_sex = go.Bar(
           x=vendas_por_hora_perc_sex.index,
            y=vendas_por_hora_perc_sex.values,
            text = vendas_por_hora_perc_sex,
            visible=False,
            marker_color=colors2,
            name='Sexta'
)
trace_sab = go.Bar(
            x=vendas_por_hora_perc_sab.index,
            y=vendas_por_hora_perc_sab.values,
            text = vendas_por_hora_perc_sab,
            visible=False,
            marker_color=colors2,
            name='Sábado'
)




data_vend_hor_dia = [trace_semana, trace_dom, trace_seg, trace_ter, trace_qua, trace_qui, trace_sex, trace_sab]
In [114]:
updatemenus_ven_hor_dia = list([

    dict
        (
         buttons=list([   
            
                dict(label = 'Semana',
                 method = 'update',
                 args = [{'visible': [True, False, False, False, False, False, False, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Semana</sup>'}]),
             
                dict(label = 'Domingo',
                 method = 'update',
                 args = [{'visible': [False, True, False, False, False, False, False, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Domingo</sup>'}]),
             
                dict(label = 'Segunda',
                 method = 'update',
                 args = [{'visible': [False, False, True, False, False, False, False, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Segunda</sup>'}]),
             
                dict(label = 'Terça',
                 method = 'update',
                 args = [{'visible': [False, False, False, True, False, False, False, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Terça</sup>'}]),
            
                dict(label = 'Quarta',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, True, False, False, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Quarta</sup>'}]),

                dict(label = 'Quinta',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, True, False, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Quinta</sup>'}]),

                dict(label = 'Sexta',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, True, False]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Sexta</sup>'}]),

                dict(label = 'Sábado',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, False, True]},
                         {'title': 'Percentual das vendas por hora - 2022<br><sup>Sábado</sup>'}])
        
                
         ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.3,
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' 
        )
])
                
In [115]:
layout_ven_hor_dia = dict(title='Percentual das vendas por hora - 2022 <br><sup>Semana</sup>', showlegend=False, 
                          updatemenus= updatemenus_ven_hor_dia)

# to change the background color

layout_ven_hor_dia.update(
    plot_bgcolor='lightcyan', 
    xaxis =dict(title = "Hora do dia", mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan'),
    yaxis =dict(title = "Percentual (%)", mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightcyan')
)
In [184]:
fig_16B = dict(data=data_vend_hor_dia, layout=layout_ven_hor_dia)

py.plot(fig_16B, filename='/home/claudio/results/grafico016B - percentual_vendas_hora_dia.html')
Out[184]:
'/home/claudio/results/grafico016B - percentual_vendas_hora_dia.html'
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001
/usr/share/atom/atom: bad option: --pid=1885

Grafico 17¶

Quantidade de itens vendidos por dia da semana e período¶

In [117]:
group_by_data_e_hora_do_dia = transactions_filtrada.groupby(by=['dia_da_semana','hora_do_dia'])['item_count'].sum()
In [118]:
# https://stackoverflow.com/questions/39275294/sort-by-certain-order-situation-pandas-dataframe-groupby
# #### Alterando a ordem de apresentação dos dias da semana(Seg - Dom), usando a lista 'dias' anteriormente criada

group_by_data_e_hora_do_dia = group_by_data_e_hora_do_dia.reindex(dias, level= 'dia_da_semana')
In [119]:
group_by_data_e_hora_do_dia = pd.DataFrame(group_by_data_e_hora_do_dia).reset_index()
In [120]:
group_by_data_e_hora_do_dia
Out[120]:
dia_da_semana hora_do_dia item_count
0 segunda manhã 587
1 segunda noite 1354
2 segunda tarde 548
3 terça manhã 545
4 terça noite 1359
5 terça tarde 619
6 quarta manhã 642
7 quarta noite 1403
8 quarta tarde 811
9 quinta manhã 526
10 quinta noite 1459
11 quinta tarde 729
12 sexta manhã 744
13 sexta noite 1575
14 sexta tarde 669
15 sábado manhã 1512
16 sábado noite 2096
17 sábado tarde 986
18 domingo manhã 2310
19 domingo noite 1793
20 domingo tarde 932
In [121]:
group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'manhã'].item_count
Out[121]:
0      587
3      545
6      642
9      526
12     744
15    1512
18    2310
Name: item_count, dtype: int64
In [122]:
trace_manhã = go.Bar(
    y = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'manhã'].item_count,
    x = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'manhã'].dia_da_semana,
    name = "manhã",
    opacity=0.6,
)

trace_tarde = go.Bar(
   y = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'tarde'].item_count,
    x = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'tarde'].dia_da_semana,
    name = "tarde",
    opacity=0.6
)

trace_noite = go.Bar(
   y = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'noite'].item_count,
    x = group_by_data_e_hora_do_dia[group_by_data_e_hora_do_dia.hora_do_dia == 'noite'].dia_da_semana,
    name = "noite",
    opacity=0.6
)

layout = go.Layout(title={'text': 'Vendas por período e dia da semana - 2022 (Fev - Out) ','y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top' },
                  xaxis = dict(title = 'Dia da Semana', showline=True, linecolor='black', gridcolor='lightcyan'),
                  yaxis = dict(title = 'Número de Itens Vendidos',mirror=True, ticks='outside', showline=True, linecolor='black', gridcolor='lightblue'), 
                  plot_bgcolor='lightcyan',
                  hovermode= 'x')

data = [trace_manhã, trace_tarde, trace_noite]

fig = go.Figure(data = data, layout = layout)  

py.iplot(fig, filename='/home/claudio/results/grafico17 - vendas-por-dia-periodo.html')

Grafico 18¶

Vendas por hora¶

In [123]:
vendas_por_hora = pd.DataFrame(
                transactions_filtrada[(transactions_filtrada.hora > 1) & 
                (transactions_filtrada.hora < 23)].groupby(by='hora')['item_count'].sum())
In [124]:
vendas_por_hora
Out[124]:
item_count
hora
6 41
7 524
8 1289
9 1749
10 1817
11 1446
12 737
13 627
14 769
15 1249
16 1912
17 3279
18 2917
19 2416
20 2094
21 333
In [125]:
trace = go.Scatter(x=list(vendas_por_hora.index),
                   y=list(vendas_por_hora.item_count),
                   fill = 'tozeroy',
                   fillcolor = 'lightblue')

data = [trace]

layout = dict(
    title= {'text' : 'Itens vendidos por hora - 2022 (Fev - Out)', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
    yaxis = dict( title = 'Volume de Itens Vendidos', gridcolor='lightblue'),
    xaxis=dict( title = 'Hora',
        rangeselector=dict(
            buttons=list([
                dict(count = 12,
                     label='Manhã',
                     step='hour',
                     stepmode='backward'),
                dict(count = 8,
                     label='Tarde',
                     step='hour',
                     stepmode='backward'),
                dict(count = 4,
                    label='Noite',
                    step='hour',
                    stepmode='backward'),
                dict(step='all')
            ]),
            visible = True
        ),
        rangeslider=dict(
            visible = True
        ),
    gridcolor='lightcyan'),
    plot_bgcolor='lightcyan'
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = '/home/claudio/results/grafico18 - vendas-por-hora.html')

Apriori Algorithm, Method 1¶

All day results¶

In [126]:
# Create a list of lists that is conducive to the format expected from apyori

list_of_lists = []

for transaction in list(set(transactions_filtrada.venda)):
    
    df = transactions_filtrada[transactions_filtrada.venda == transaction]
    values = df.produto.values
    
    list_of_lists.append(list(values))
In [127]:
# imprting the 'apriori' function from the 'apyori' module
from apyori import apriori

# Create a list of association rules
association_rules = apriori(list_of_lists, min_confidence = 0.2, min_support = 0.005,min_lift = 2, min_length=2)  
association_results = list(association_rules)
In [128]:
# print(association_results)
In [129]:
for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")
Rule: ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO
Support: 0.0061068702290076335
Confidence: 0.5416666666666666
Lift: 70.95833333333333
=====================================
Rule: CROISSANT -> ADICIONAL DE MUSSARELA
Support: 0.009395184967704051
Confidence: 0.8333333333333333
Lift: 4.038607474862455
=====================================
Rule: CROISSANT -> ADICIONAL DE PRESUNTO
Support: 0.006928948913681738
Confidence: 0.9076923076923077
Lift: 4.398975526465566
=====================================
Rule: CROISSANT -> AGUA MINERAL S/GAS 500ML
Support: 0.01996476805637111
Confidence: 0.4197530864197531
Lift: 2.0342615428936814
=====================================
Rule: PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS AVEIA
Support: 0.007868467410452143
Confidence: 0.24814814814814815
Lift: 9.107678799489145
=====================================
Rule: CROISSANT -> CAFE CATUAI VERMELHO C/LEITE G
Support: 0.007868467410452143
Confidence: 0.43506493506493504
Lift: 2.1084677985645546
=====================================
Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML
Support: 0.013740458015267175
Confidence: 0.46799999999999997
Lift: 2.2680819578827545
=====================================
Rule: CROISSANT -> CAFE COLMEIA C/ LEITE G 200ML
Support: 0.00551967116852613
Confidence: 0.5280898876404494
Lift: 2.5592973211487915
=====================================
Rule: CROISSANT -> CAFE COLMEIA G 200ML
Support: 0.007633587786259542
Confidence: 0.4276315789473684
Lift: 2.0724433094688917
=====================================
Rule: CROISSANT -> LARANJA 400 ML
Support: 0.021374045801526718
Confidence: 0.6275862068965518
Lift: 3.041489215551587
=====================================
Rule: CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA 400ML
Support: 0.005989430416911333
Confidence: 0.2451923076923077
Lift: 5.394864341085272
=====================================
Rule: CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD
Support: 0.005167351732237228
Confidence: 0.4356435643564356
Lift: 9.391151773405188
=====================================
Rule: MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT 25G KIT C 5
Support: 0.00540223135642983
Confidence: 0.2358974358974359
Lift: 5.046901172529314
=====================================
Rule: CROISSANT -> ADICIONAL DE MUSSARELA
Support: 0.005637110980622431
Confidence: 0.5
Lift: 72.16101694915254
=====================================
In [130]:
rule_list = []
for item in association_results:
    
    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]

    rule_list.append ( items[0] + " -> " + items[1])

support_list =[]
for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    #items = [x for x in pair]
    

    #second index of the inner list
    support_list.append( str(item[1]))
confidence_list =[]
for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    

    #second index of the inner list
    confidence_list.append(str(item[2][0][2]))
lift_list =[]
for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    
    
    #second index of the inner list
    lift_list.append(str(item[2][0][3]))

Convert a list to a Dataframe¶

In [131]:
association_results_df=pd.DataFrame(list
(zip(rule_list, support_list, confidence_list, lift_list)), columns=['Rule','Support', 'Confidence', 'Lift'])
In [132]:
association_results_df
Out[132]:
Rule Support Confidence Lift
0 ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO 0.0061068702290076335 0.5416666666666666 70.95833333333333
1 CROISSANT -> ADICIONAL DE MUSSARELA 0.009395184967704051 0.8333333333333333 4.038607474862455
2 CROISSANT -> ADICIONAL DE PRESUNTO 0.006928948913681738 0.9076923076923077 4.398975526465566
3 CROISSANT -> AGUA MINERAL S/GAS 500ML 0.01996476805637111 0.4197530864197531 2.0342615428936814
4 PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS A... 0.007868467410452143 0.24814814814814815 9.107678799489145
5 CROISSANT -> CAFE CATUAI VERMELHO C/LEITE G 0.007868467410452143 0.43506493506493504 2.1084677985645546
6 CROISSANT -> CAFE CATUAI VERMELHO G 200ML 0.013740458015267175 0.46799999999999997 2.2680819578827545
7 CROISSANT -> CAFE COLMEIA C/ LEITE G 200ML 0.00551967116852613 0.5280898876404494 2.5592973211487915
8 CROISSANT -> CAFE COLMEIA G 200ML 0.007633587786259542 0.4276315789473684 2.0724433094688917
9 CROISSANT -> LARANJA 400 ML 0.021374045801526718 0.6275862068965518 3.041489215551587
10 CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA... 0.005989430416911333 0.2451923076923077 5.394864341085272
11 CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD 0.005167351732237228 0.4356435643564356 9.391151773405188
12 MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT ... 0.00540223135642983 0.2358974358974359 5.046901172529314
13 CROISSANT -> ADICIONAL DE MUSSARELA 0.005637110980622431 0.5 72.16101694915254

Grafico 19¶

In [185]:
fig_apriori_m1= go.Figure(data=[go.Table(
    header=dict(values=list(['Regra', 'Suporte','Confiança','Incremento'] ),
                font=dict(color='white', size=12), # configurando a cor e tamanho da letra
                fill_color='seagreen',
                align='left'),
    cells=dict(values=[association_results_df['Rule'], association_results_df['Support'], association_results_df['Confidence'], association_results_df['Lift']],
               fill_color='lightcyan',
               align='left'))
])

fig_apriori_m1.update_layout(
    title={
        'text': "Aplicando o algoritmo Apriori - Modelo 01 - 2022 (Fev -Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.plot(fig_apriori_m1, filename='/home/claudio/results/grafico19 - Aplicando_Apriori_Modelo_01.html')
Out[185]:
'/home/claudio/results/grafico19 - Aplicando_Apriori_Modelo_01.html'
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001
/usr/share/atom/atom: bad option: --pid=2323

Resultados - Manhã¶

In [134]:
manha_filter = (transactions_filtrada['hora_do_dia']=='manhã')
transactions_filtrada_manha = transactions_filtrada[manha_filter]
# Create a list of lists that is conducive to the format expected from apyori


list_of_lists_manha = []

for transaction in list(set(transactions_filtrada_manha.venda)):
    
    df = transactions_filtrada_manha[transactions_filtrada_manha.venda == transaction]
    values = df.produto.values
    
    list_of_lists_manha.append(list(values))
expr: syntax error: missing argument after ‘8’
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001
/usr/share/atom/atom: bad option: --pid=524
In [135]:
# Create a list of association rules
association_rules_manha = apriori(list_of_lists_manha, min_confidence = 0.2, min_support = 0.005,min_lift = 3, min_length=3)  
association_results_manha = list(association_rules_manha)

Resultado da função "associantion_results_manha" período Manhã¶

In [136]:
for item in association_results_manha:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")
Rule: ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO
Support: 0.009764758100310697
Confidence: 0.5945945945945946
Lift: 40.5945945945946
=====================================
Rule: ADICIONAL DE MUSSARELA -> AGUA MINERAL S/GAS 500ML
Support: 0.005326231691078562
Confidence: 0.3243243243243244
Lift: 5.294947121034079
=====================================
Rule: CROISSANT -> ADICIONAL DE MUSSARELA
Support: 0.015090989791389259
Confidence: 0.918918918918919
Lift: 3.344627341396324
=====================================
Rule: CROISSANT -> ADICIONAL DE PRESUNTO
Support: 0.013315579227696404
Confidence: 0.9090909090909091
Lift: 3.3088559259803203
=====================================
Rule: CROISSANT BRIE PARMA MEL DE -> AGUA MINERAL SEM GAS 500 ML
Support: 0.007545494895694629
Confidence: 0.2
Lift: 5.006666666666667
=====================================
Rule: PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS AVEIA
Support: 0.024411895250776743
Confidence: 0.2972972972972973
Lift: 3.9170222854433385
=====================================
Rule: SELECAO DE PAES GELEIA -> BOWL DE IOGURTE FRUTAS AVEIA
Support: 0.010208610741233911
Confidence: 0.2674418604651163
Lift: 3.2570081709616594
=====================================
Rule: CAFE CATUAI VERMELHO C/LEITE G -> LARANJA 400 ML
Support: 0.007989347536617843
Confidence: 0.23684210526315788
Lift: 3.629967776584318
=====================================
Rule: CROISSANT IFOOD -> COOKIES 80G IFOOD
Support: 0.005770084332001775
Confidence: 0.619047619047619
Lift: 7.621389539422324
=====================================
Rule: CROISSANT BRIE PARMA MEL DE -> CROISSANT OVOS BACON MEL
Support: 0.007989347536617843
Confidence: 0.2
Lift: 4.506
=====================================
Rule: CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA 400ML
Support: 0.00887705281846427
Confidence: 0.22222222222222224
Lift: 6.032128514056224
=====================================
Rule: CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD
Support: 0.008433200177541057
Confidence: 0.5428571428571428
Lift: 6.683372365339578
=====================================
Rule: CROISSANT IFOOD -> SOURDOUGH 369G IFOOD
Support: 0.008433200177541057
Confidence: 0.59375
Lift: 7.309938524590163
=====================================
Rule: CROISSANT MUSSATRELA DE -> SUCO DE LARANJA 400ML
Support: 0.006657789613848202
Confidence: 0.2459016393442623
Lift: 6.674896306537625
=====================================
Rule: SUCO DE LARANJA 400ML -> CROISSANT OVOS BACON MEL
Support: 0.008433200177541057
Confidence: 0.2289156626506024
Lift: 5.157469879518072
=====================================
Rule: MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT 25G KIT C 5
Support: 0.007101642254771416
Confidence: 0.2857142857142857
Lift: 5.646616541353383
=====================================
Rule: CROISSANT -> ADICIONAL DE MUSSARELA
Support: 0.009320905459387484
Confidence: 0.5675675675675677
Lift: 42.624324324324334
=====================================
Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML
Support: 0.006657789613848202
Confidence: 0.2054794520547945
Lift: 3.354675402025015
=====================================
Rule: CROISSANT -> LARANJA 400 ML
Support: 0.006213936972924989
Confidence: 0.29166666666666663
Lift: 4.470238095238095
=====================================
Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML
Support: 0.006657789613848202
Confidence: 0.2054794520547945
Lift: 3.149287112105116
=====================================
Rule: CROISSANT -> CAFE CATUAI VERMELHO G 200ML
Support: 0.005326231691078562
Confidence: 0.3076923076923077
Lift: 5.590570719602978
=====================================
Rule: CROISSANT -> PAO SOURDOUGH OVOS
Support: 0.005326231691078562
Confidence: 0.3076923076923077
Lift: 4.71585557299843
=====================================
Rule: CROISSANT -> SANDUICHE DE PEPERONI BRIE
Support: 0.006213936972924989
Confidence: 0.3783783783783784
Lift: 3.1928332827209234
=====================================
In [137]:
rule_list_manha = []
for item in association_results_manha:
    
    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]

    rule_list_manha.append ( items[0] + " -> " + items[1])

support_list_manha =[]
for item in association_results_manha:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    #items = [x for x in pair]
    

    #second index of the inner list
    support_list_manha.append( str(item[1]))

confidence_list_manha =[]
for item in association_results_manha:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    

    #second index of the inner list
    confidence_list_manha.append(str(item[2][0][2]))

lift_list_manha =[]
for item in association_results_manha:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    
    
    #second index of the inner list
    lift_list_manha.append(str(item[2][0][3]))

Converting the list to dataframe - Manhã¶

In [138]:
association_results_manha_df=pd.DataFrame(list
(zip(rule_list_manha, support_list_manha, confidence_list_manha, lift_list_manha)), columns=['Rule','Support', 'Confidence', 'Lift'])
In [139]:
association_results_manha_df.sort_values(by=['Support'],ascending=False)
Out[139]:
Rule Support Confidence Lift
5 PAO SOURDOUGH OVOS -> BOWL DE IOGURTE FRUTAS A... 0.024411895250776743 0.2972972972972973 3.9170222854433385
2 CROISSANT -> ADICIONAL DE MUSSARELA 0.015090989791389259 0.918918918918919 3.344627341396324
3 CROISSANT -> ADICIONAL DE PRESUNTO 0.013315579227696404 0.9090909090909091 3.3088559259803203
6 SELECAO DE PAES GELEIA -> BOWL DE IOGURTE FRUT... 0.010208610741233911 0.2674418604651163 3.2570081709616594
0 ADICIONAL DE MUSSARELA -> ADICIONAL DE PRESUNTO 0.009764758100310697 0.5945945945945946 40.5945945945946
16 CROISSANT -> ADICIONAL DE MUSSARELA 0.009320905459387484 0.5675675675675677 42.624324324324334
10 CROISSANT BRIE PARMA MEL DE -> SUCO DE LARANJA... 0.00887705281846427 0.22222222222222224 6.032128514056224
14 SUCO DE LARANJA 400ML -> CROISSANT OVOS BACON MEL 0.008433200177541057 0.2289156626506024 5.157469879518072
12 CROISSANT IFOOD -> SOURDOUGH 369G IFOOD 0.008433200177541057 0.59375 7.309938524590163
11 CROISSANT IFOOD -> PASTEL DE NATA 60G IFOOD 0.008433200177541057 0.5428571428571428 6.683372365339578
9 CROISSANT BRIE PARMA MEL DE -> CROISSANT OVOS ... 0.007989347536617843 0.2 4.506
7 CAFE CATUAI VERMELHO C/LEITE G -> LARANJA 400 ML 0.007989347536617843 0.23684210526315788 3.629967776584318
4 CROISSANT BRIE PARMA MEL DE -> AGUA MINERAL SE... 0.007545494895694629 0.2 5.006666666666667
15 MINI CROISSANT PAIN DE CHOC -> MINI CROISSANT ... 0.007101642254771416 0.2857142857142857 5.646616541353383
13 CROISSANT MUSSATRELA DE -> SUCO DE LARANJA 400ML 0.006657789613848202 0.2459016393442623 6.674896306537625
17 CROISSANT -> CAFE CATUAI VERMELHO G 200ML 0.006657789613848202 0.2054794520547945 3.354675402025015
19 CROISSANT -> CAFE CATUAI VERMELHO G 200ML 0.006657789613848202 0.2054794520547945 3.149287112105116
18 CROISSANT -> LARANJA 400 ML 0.006213936972924989 0.29166666666666663 4.470238095238095
22 CROISSANT -> SANDUICHE DE PEPERONI BRIE 0.006213936972924989 0.3783783783783784 3.1928332827209234
8 CROISSANT IFOOD -> COOKIES 80G IFOOD 0.005770084332001775 0.619047619047619 7.621389539422324
1 ADICIONAL DE MUSSARELA -> AGUA MINERAL S/GAS 5... 0.005326231691078562 0.3243243243243244 5.294947121034079
20 CROISSANT -> CAFE CATUAI VERMELHO G 200ML 0.005326231691078562 0.3076923076923077 5.590570719602978
21 CROISSANT -> PAO SOURDOUGH OVOS 0.005326231691078562 0.3076923076923077 4.71585557299843

Apriori Algorithm, Method 2¶

In [140]:
def apriori_results_method_2(df = transactions_filtrada, min_support = 0.005, hora_do_dia = None, dia_da_semana = None,
                            min_threshold = 1, metric = 'lift'):
    
    """
    Takes in a data frame and returns the results of the Apriori algorithm using the mlxtend Python package.
    """
    
    from mlxtend.frequent_patterns import apriori
    from mlxtend.frequent_patterns import association_rules

    if hora_do_dia:
        
        df = df[df.hora_do_dia == hora_do_dia]
        
    if dia_da_semana:
        
        df = df[df.tipo_de_dia == dia_da_semana]
        
    dummies_df = df.groupby(['venda', 'produto'])['produto'].count().unstack().\
                 reset_index().fillna(0).set_index('venda')
    
    def encode_units(x):
    
        if x <= 0:
            return 0
        if x >= 1:
            return 1
    
    dummies_df = dummies_df.applymap(encode_units)
    
    frequent_itemsets = apriori(dummies_df.astype('bool'), min_support= min_support, use_colnames=True)
# DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type"
# https://stackoverflow.com/questions/74114745/how-to-fix-deprecationwarning-dataframes-with-non-bool-types-result-in-worse-c
    
    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
    
    rules.drop_duplicates(subset='leverage', keep='first', inplace=True)
    rules = rules.sort_values(by='lift', ascending=False)
    
    return rules

Transformando o rsultado da função "apriori_results_method_2" em um DATAFRAME¶

In [141]:
all_day_method_2 = pd.DataFrame(apriori_results_method_2()).reset_index()
del all_day_method_2["index"]

Fazendo a limpeza do DATAFRAME¶

In [142]:
#the first two columns when converted to the dataframe are in the "Frozenset" format, and turn out a error message, when run Plotly to plot the dataframe into a table 
In [143]:
cols_to_check = ['antecedents','consequents']
In [144]:
all_day_method_2['antecedents']= all_day_method_2['antecedents'].apply(set) # converting the column from Frozenset to set
In [145]:
all_day_method_2['consequents']= all_day_method_2['consequents'].apply(set) # converting the column from Frozenset to set
In [146]:
all_day_method_2[cols_to_check]=all_day_method_2[cols_to_check].astype('string') # converting the two columns into string types
In [147]:
all_day_method_2[cols_to_check] = all_day_method_2[cols_to_check].replace({"{":""}, regex=True) # deleting the symbols in the columns
all_day_method_2[cols_to_check] = all_day_method_2[cols_to_check].replace({"'":""}, regex=True)
all_day_method_2[cols_to_check] = all_day_method_2[cols_to_check].replace({"}":""}, regex=True)
In [148]:
all_day_method_2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   antecedents         60 non-null     string 
 1   consequents         60 non-null     string 
 2   antecedent support  60 non-null     float64
 3   consequent support  60 non-null     float64
 4   support             60 non-null     float64
 5   confidence          60 non-null     float64
 6   lift                60 non-null     float64
 7   leverage            60 non-null     float64
 8   conviction          60 non-null     float64
dtypes: float64(7), string(2)
memory usage: 4.3 KB
In [149]:
all_day_method_2.head()
Out[149]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction
0 CROISSANT, ADICIONAL DE MUSSARELA ADICIONAL DE PRESUNTO 0.009395 0.007634 0.005637 0.600000 78.600000 0.005565 2.480916
1 CROISSANT, ADICIONAL DE PRESUNTO ADICIONAL DE MUSSARELA 0.006929 0.011274 0.005637 0.813559 72.161017 0.005559 5.303166
2 ADICIONAL DE MUSSARELA ADICIONAL DE PRESUNTO 0.011274 0.007634 0.006107 0.541667 70.958333 0.006021 2.165163
3 CROISSANT IFOOD PASTEL DE NATA 60G IFOOD 0.046389 0.011861 0.005167 0.111392 9.391152 0.004617 1.112008
4 PAO SOURDOUGH OVOS BOWL DE IOGURTE FRUTAS AVEIA 0.027246 0.031709 0.007868 0.288793 9.107679 0.007005 1.361476

Grafico 20¶

In [150]:
fig_apriori_m2= go.Figure(
    data=
    [
    go.Table
    (
    header=dict
    (
    values=list
    ([
    "antecedents","consequents","antecedent support","consequent support","support","confidence","lift","leverage", "conviction"
    ]),
    font=dict(color='white', size=12), # configurando a cor e tamanho da letra
    fill_color='seagreen',
    align='left'
    ), 
    cells=dict
    (
        values=
        [
                all_day_method_2['antecedents'], 
                all_day_method_2['consequents'], 
                all_day_method_2['antecedent support'],
                all_day_method_2['consequent support'],
                all_day_method_2['support'],
                all_day_method_2['confidence'], 
                all_day_method_2['lift'], 
                all_day_method_2['leverage'],
                all_day_method_2['conviction'],
        ], 
        fill_color='lightcyan',
        align='left'
    )
    )
    ]
    )

fig_apriori_m2.update_layout(
    title={
        'text': "Aplicando o algoritmo Apriori - Modelo 02 - 2022 (Fev -Out)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

py.plot(fig_apriori_m2, filename='/home/claudio/results/grafico20 - Apriori_Method_02_all_Day.html')
Out[150]:
'/home/claudio/results/grafico20 - Apriori_Method_02_all_Day.html'
In [151]:
morning_method_2 = apriori_results_method_2(hora_do_dia = 'manhã')
/usr/share/atom/atom: bad option: --executed-from=/home/claudio/farina_in_natura001
/usr/share/atom/atom: bad option: --pid=669
expr: syntax error: missing argument after ‘8’
In [152]:
afternoon_method_2 = apriori_results_method_2(hora_do_dia = 'tarde')
In [153]:
evening_method_2 = apriori_results_method_2(hora_do_dia = 'noite')
In [154]:
weekend_method_2 = apriori_results_method_2(dia_da_semana = 'FDS')
In [155]:
weekday_method_2 = apriori_results_method_2(dia_da_semana = 'DDS')
In [156]:
weekend_morning_method_2 = apriori_results_method_2(dia_da_semana = 'FDS', hora_do_dia = 'manhã')
In [157]:
weekday_afternoon_method_2 = apriori_results_method_2(dia_da_semana = 'DDS', hora_do_dia = 'tarde')
In [158]:
weekday_afternoon_method_2.groupby(by='antecedents')['lift'].sum().sort_values(ascending=False)
Out[158]:
antecedents
(CROISSANT, ADICIONAL DE MUSSARELA)             132.319444
(CROISSANT, ADICIONAL DE PRESUNTO)              123.727273
(ADICIONAL DE MUSSARELA)                        123.727273
(CROISSANT)                                      37.912645
(PASTEL DE NATA 60G)                             27.373780
(CROISSANT IFOOD)                                13.610000
(COOKIES 80G)                                    12.801238
(PASTEL DE NATA 60G, BOMBOLONI NUTELLA 80G)      11.864259
(FOLHADO FRANGO 160G)                            11.376274
(FOLHADO FRANGO 160G, PASTEL DE NATA 60G)        10.807043
(BOMBOLONI NUTELLA 80G)                           8.332997
(SOURDOUGH AZEITONA E ALECRIM)                    7.995589
(COCA COLA LATA 350 ML)                           6.903623
(CROISSANT  TRADICIONAL 80 G)                     5.705271
(CAFE EQUILIBRADO CAPSULA P)                      5.242681
(FOLHADO MACA 80G)                                5.070133
(CROISSANT  AMENDOAS 100G)                        4.930859
(FOLHADO FRANGO 160G, BOMBOLONI NUTELLA 80G)      4.472770
(MINI CROISSANT PAIN DE CHOC)                     4.455655
(CROISSANT, PASTEL DE NATA 60G)                   3.434391
(CROISSANT, FOLHADO FRANGO 160G)                  2.354089
(PIZZA AL TAGLIO GORGONZOLA E)                    2.223856
(MINI CROISSANT 25G KIT C 5)                      1.972464
(MINI CROISSANT 25G)                              1.825620
(SOURDOUGH SEMI INTEGRAL 370G)                    1.779085
Name: lift, dtype: float64